import numpy as np
import pandas as pd
import datetime as dt
import calendar
#import pycountry_convert as pc
#import pycountry as pctry
import yfinance as yf
%matplotlib inline
import matplotlib.pyplot as plt
import hvplot.pandas
import panel as pn
import plotly.express as px
from pathlib import Path
from dotenv import load_dotenv
from panel.interact import interact
import plotly.graph_objects as go
##** Read the data first time and save it to a file.**
import yfinance as yf
tickersY = yf.Tickers('VTSAX IYZ XLB XLY XLP XLE IYF XLV XLI XLK IYR XLU IBB ITA SOXX ITB')
df_hist=tickersY.history(start='2000-01-01', end='2020-06-04')
sector_df=df_hist['Close']
sector_df.columns=(['Total Stock Market','Communications', 'Materials', 'Consumer Discretionary', 'Consumer Staples',
'Energy', 'Financials', 'Health Care', 'Industrial', 'Technology', 'Real Estate',
'Utilities', 'BioTech', 'Defense', 'Semiconductor', 'Home Construction'])
#sector_df.to_csv("yfinance_sector_data.csv")
#sector_df=pd.read_csv("yfinance_sector_data.csv", infer_datetime_format=True)
#sector_df.set_index("Date",inplace=True)
sector_df.head(3)
sector_df_stddev=sector_df.pct_change()
sector_df_stddev.index
#calculate std dev yearly
std_dev_yearly= sector_df_stddev.rolling(window=30).std().groupby(sector_df_stddev.index.year).mean()
std_dev_yearly.head(3)
std_dev_yearly.plot(figsize=(15,8))
# slice the dataframes
df_Total_Stock_Market=std_dev_yearly[['Total Stock Market']]
df_cat_defensive=std_dev_yearly[['Health Care','Consumer Staples','Utilities','Defense','Home Construction']]
df_cat_aggressive=std_dev_yearly[['Communications','Materials','Financials','Technology','BioTech','Semiconductor','Materials','Energy','Industrial']]
# calculate average of defensive and aggressive for plotting and add it as new column
df_cat_defensive['average'] = df_cat_defensive.mean(numeric_only=True, axis=1)
df_cat_aggressive['average'] = df_cat_aggressive.mean(numeric_only=True, axis=1)
df_Total_Stock_Market.head(2)
df_cat_defensive.head(2)
df_cat_aggressive.head(2)
# *** Plot the graph
# Set figure for defensive stocks
ax = df_cat_defensive[['average']].plot(figsize=(20,10))
# Overlay aggressive stocks
df_cat_aggressive[['average']].plot(ax=ax)
df_Total_Stock_Market.plot(ax=ax)
# Set the legend of the figure
ax.legend(["Defensive Stocks", "Aggressive Stocks","Total Stock Market"]);
# *** plot bar chart for defensive and aggressive
#ax1 = plt.subplot(1,1,1)
f, ax1 = plt.subplots(figsize=(12,7))
w = 0.3
#plt.xticks(), will label the bars on x axis with the respective country names.
plt.xticks(df_cat_aggressive.index+w/2, df_cat_aggressive.index, rotation='vertical')
aggr =ax1.bar(df_cat_aggressive.index, df_cat_aggressive['average'], width=w, color="#346e60", align='center', )
#The trick is to use two different axes that share the same x axis, we have used ax1.twinx() method.
ax2 = ax1.twinx()
#We have calculated GDP by dividing gdpPerCapita to population.
defs =ax2.bar(df_cat_defensive.index+w, df_cat_defensive['average'], width=w,color="orange",align='center',)
#Set the Y axis label as GDP.
#plt.ylabel('Aggressive')
#To set the legend on the plot we have used plt.legend()
plt.legend([aggr, defs],['Aggressive Stocks', 'Defensive Stocks'])
#To show the plot finally we have used plt.show().
plt.show()
df_cat_joined=pd.concat([df_cat_aggressive[['average']],df_cat_defensive[['average']]], axis='columns', join='inner')
df_cat_joined.head(2)
df_cat_joined.columns=['Avg. Aggressive','Avg. Defensive']
df_cat_joined.head(2)
df_cat_joined.plot.bar(figsize=(12,8))
dfjumps_by_year = pd.read_csv("./data/jumps_by_year.csv")
dfjumps_by_year.head(2)
dfjumps_by_year=dfjumps_by_year[['Date','ScaledVal']]
dfjumps_by_year.set_index('Date', inplace=True)
dfjumps_by_year.head(3)
ax = df_cat_defensive[['average']].plot(figsize=(20,10))
# Overlay aggressive stocks
df_cat_aggressive[['average']].plot(ax=ax)
df_Total_Stock_Market.plot(ax=ax)
dfjumps_by_year.plot(ax=ax)
# Set the legend of the figure
ax.legend(["Defensive Stocks", "Aggressive Stocks","Total Stock Market Normal","Total Stock Market Jumps"]);
def scaler(seq, rng):
ba=rng[1]-rng[0]
s2=[]
for x in seq:
mini=min(seq)
maxi=max(seq)
y=ba*((x-mini)/(maxi-mini))
s2.append(y)
return pd.DataFrame(s2, seq.index )
df_usa_deaths_affected = pd.read_csv("./data/P1_df_usa_deaths_affected.csv")
df_usa_deaths_affected.columns=['Date', 'Total Deaths', 'No Affected']
df_usa_deaths_affected.set_index("Date", inplace=True)
df_usa_deaths_affected.head(3)
df_usa_deaths_affected['Scaled Deaths']=scaler(df_usa_deaths_affected['Total Deaths'], [0,.03])
df_usa_deaths_affected['Scaled Affected']=scaler(df_usa_deaths_affected['No Affected'], [0,.03])
df_usa_deaths_affected.head(2)
ax = df_cat_defensive[['average']].plot(figsize=(20,10))
# Overlay aggressive stocks
df_cat_aggressive[['average']].plot(ax=ax)
df_Total_Stock_Market.plot(ax=ax)
dfjumps_by_year.plot(ax=ax)
df_usa_deaths_affected['Scaled Deaths'].plot(ax=ax)
df_usa_deaths_affected['Scaled Affected'].plot(ax=ax)
# Set the legend of the figure
ax.legend(["Defensive Stocks", "Aggressive Stocks","Total Stock Market Normal","Total Stock Market Jumps","Total Deaths","Total Affected"]);
df_disasters_unscaled = pd.read_csv('./data/df_disasters_unscaled.csv')
df_jumps_and_std_dev = pd.read_csv('./data/jumps_and_std_dev.csv')
df_jumps_and_std_dev.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
df_jumps_and_std_dev["Date"]=df_jumps_and_std_dev["Date"] +2000
df_jumps_and_std_dev.set_index("Date", inplace=True)
df_jumps_and_std_dev.head(3)
df_disasters_unscaled.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
df_disasters_unscaled["Date"]=df_disasters_unscaled["Date"] +2000
df_disasters_unscaled.set_index("Date", inplace=True)
df_disasters_unscaled.head(3)
combined_df=pd.concat([df_disasters_unscaled, df_jumps_and_std_dev], axis=1)
combined_df.head(4)
combined_df['Average Std Dev']=round(combined_df['Average Std Dev']*100,2)
combined_df=combined_df.sort_values("Date", ascending=True)
figi=px.parallel_categories(combined_df, height=800, color=combined_df.index)
figi.show()
df_total_rets_year=pd.read_csv("./data/total_rets_year.csv",index_col="Date", infer_datetime_format=True)
df_total_rets_year.head(3)
df_total_rets_year.describe()
df_total_rets_year.plot(figsize=(15,8))
hvplot.plot(df_total_rets_year, kind="line", width=1000, height=600)
df_total_rets_year_averages=df_total_rets_year.describe().loc[["mean"]]
df_total_rets_year_averages=df_total_rets_year_averages.T
df_total_rets_year_averages
fig = px.bar(df_total_rets_year_averages, x=df_total_rets_year_averages.index, y='mean',
color=df_total_rets_year_averages.index,
labels={'mean':'Average Return', 'index':'Stocks'}, height=600, title="Average Stock returns from 2000 to 2020" )
fig.show()
df_total_rets_year_averages["mean"] = df_total_rets_year_averages["mean"]-1
fig = px.bar(df_total_rets_year_averages, x=df_total_rets_year_averages.index, y='mean',
color=df_total_rets_year_averages.index,
labels={'mean':'Average Return', 'index':'Stocks'}, height=600, title="Average Stock returns from 2000 to 2020" )
fig.show()
df_year_all_joined = pd.read_csv('./data/year_all_joined.csv')
df_month_all_joined = pd.read_csv('./data/month_all_joined.csv')
df_year_all_joined.columns=['Year', 'Total Stock Market', 'Communications', 'Materials',
'Consumer Discretionary', 'Consumer Staples', 'Energy', 'Financials',
'Health Care', 'Industrial', 'Technology', 'Real Estate', 'Utilities',
'BioTech', 'Defense', 'Semiconductor', 'Home Construction',
'Number of Disasters', 'Total Deaths', 'No Affected']
df_year_all_joined.set_index("Year", inplace=True)
df_year_all_joined.head(2)
df_month_all_joined.columns=['Year', 'Month', 'Total Stock Market', 'Communications',
'Materials', 'Consumer Discretionary', 'Consumer Staples', 'Energy',
'Financials', 'Health Care', 'Industrial', 'Technology', 'Real Estate',
'Utilities', 'BioTech', 'Defense', 'Semiconductor', 'Home Construction',
'Number of Disasters', 'Total Deaths', 'No Affected']
df_month_all_joined.set_index(["Year","Month"], inplace=True)
def plot_deaths():
deaths= df_year_all_joined[["Total Deaths"]]
std_y=deaths["Total Deaths"].std()
min_y=deaths["Total Deaths"].min()-std_y
max_y=deaths["Total Deaths"].max()+std_y
ax = deaths["Total Deaths"].plot(kind="bar", title='Deaths-Natural disasters in USA (2000 - 2020)',color="#bf6270",figsize=(7,5))
ax.set_xlabel("Year")
ax.set_ylabel("No of deaths")
plt.ylim(min_y,max_y)
plt.show()
return ax
def plot_affected():
affected= df_year_all_joined[["No Affected"]]
std_y=affected["No Affected"].std()
min_y=affected["No Affected"].min()-std_y
max_y=affected["No Affected"].max()+std_y
ax = affected["No Affected"].plot(kind="bar", title='Affected-Natural disasters in USA (2000 - 2020)',color="orange",figsize=(7,5))
ax.set_xlabel("Year")
ax.set_ylabel("No of affected people")
plt.ylim(min_y,max_y)
plt.show()
return ax
plot_deaths()
plot_affected()
df_year_all_joined = pd.read_csv('./data/year_all_joined.csv')
df_year_all_joined.columns=['Year', 'Total Stock Market', 'Communications', 'Materials',
'Consumer Discretionary', 'Consumer Staples', 'Energy', 'Financials',
'Health Care', 'Industrial', 'Technology', 'Real Estate', 'Utilities',
'BioTech', 'Defense', 'Semiconductor', 'Home Construction',
'Number of Disasters', 'Total Deaths', 'No Affected']
df_year_all_joined.set_index("Year", inplace=True)
df_year_all_joined.head(2)
df_sector_jumps_totals_sliced= df_year_all_joined[['Total Stock Market', 'Communications', 'Materials',
'Consumer Discretionary', 'Consumer Staples', 'Energy', 'Financials',
'Health Care', 'Industrial', 'Technology', 'Real Estate', 'Utilities',
'BioTech', 'Defense', 'Semiconductor', 'Home Construction']]
df_sector_jumps_totals_sliced.head(4)
df_sector_jumps_totals_sliced.reset_index(inplace=True)
#df_sector_jumps_totals
cols=range(len(df_sector_jumps_totals_sliced.columns))
rows=range(len(df_sector_jumps_totals_sliced.index))
data_list=[]
count=0
for i in rows:
row=int(i)
year= int(df_sector_jumps_totals_sliced.loc[row][0])
sectors=[]
jumps=[]
for j in cols:
col=int(j)
sector=df_sector_jumps_totals_sliced.columns[col]
jumpcount=int(df_sector_jumps_totals_sliced.loc[row][col])
if col !=0:
# prepare list of values to be entered as columns to df
sectors.append(sector)
jumps.append(jumpcount)
data_list.append([year,sector,jumpcount])
df_sector_jumps_totals=pd.DataFrame(data_list)
df_sector_jumps_totals.columns=["year","sector","jumpcount"]
df_sector_jumps_totals.head(5)
fig = px.scatter(df_sector_jumps_totals, x="year", y="jumpcount", size="jumpcount", color="sector",hover_name="sector", log_x=True, size_max=60)
fig.show()
df_sector_jumps_totals_Communications= df_sector_jumps_totals[df_sector_jumps_totals.sector=='Communications']
fig = px.scatter(df_sector_jumps_totals_Communications, x="year", y="jumpcount", size="jumpcount", color="sector",hover_name="sector", log_x=True, size_max=60)
fig.show()
def plot_sector_jumps_totals_by_sector(sector):
df_sector_jumps_totals_sector= df_sector_jumps_totals[df_sector_jumps_totals.sector==sector]
fig = px.scatter(df_sector_jumps_totals_sector, x="year", y="jumpcount", size="jumpcount", color="sector",hover_name="sector", log_x=True, size_max=60)
return fig
ax=plot_sector_jumps_totals_by_sector("BioTech")
ax.show()
interact(plot_sector_jumps_totals_by_sector, sector=df_sector_jumps_totals["sector"])
df_gdp_cap_yearly=pd.read_csv("./data/P1_usa_gdp_cap_data.csv", infer_datetime_format=True)
df_gdp_cap_yearly.rename(columns={"Unnamed: 0":"year"}, inplace=True)
df_gdp_cap_yearly.set_index("year", inplace=True)
df_gdp_cap_yearly.head(2)
df_gdp_cap_yearly.describe()
df_gdp_cap_yearly['Scaled gdp']=scaler(df_gdp_cap_yearly['gdp'], [36334.908780,65112.000])
df_gdp_cap_yearly.head(3)
ax = df_gdp_cap_yearly[['cap']].plot(figsize=(20,10))
# Overlay aggressive stocks
df_gdp_cap_yearly[['Scaled gdp']].plot(ax=ax)
# Set the legend of the figure
ax.legend(["Per Capita Income", "GDP"]);
df_year=pd.read_csv("./data/df_year_scaled.csv", infer_datetime_format=True)
df_year.set_index("Year",inplace=True)
df_year.head(2)
# hline_def=df_year['Scaled Affected'].rolling(2).cov(df_cat_defensive['Scaled Average'])
# hline_def_scaled=scaler(hline_def, [0,.03])
# hline_agg=df_year['Scaled Affected'].rolling(2).cov(df_cat_aggressive['Scaled Average'])
# hline_agg_scaled=scaler(hline_agg, [0,.03])
# hline_rets=df_year['Scaled Affected'].rolling(2).cov(df_total_rets_year['Total Stock Market']-1)
# hline_rets_scaled=scaler(hline_rets, [0,.03])
# hline_def_scaled.plot()
# -Parallel plots From combined_df: Sorted by # of disasters
combined_df_disaster_sorted=combined_df.sort_values('Number of Disasters', ascending=False)
combined_df_disaster_sorted.index.name='Year'
figi_disaster=px.parallel_categories(combined_df_disaster_sorted.reset_index(), height=800, dimensions=combined_df_disaster_sorted[['Total Stock Market', 'Number of Disasters', 'Total Deaths', 'No Affected', 'Average Std Dev']], color='Year',
labels={'No Affected':'Total Number of People Affected', 'average2': 'Average Rolling Standard Deviation of Returns', 'Total Stock Market': 'Stock Market Jumps'})
figi_disaster.show()
#Combined_df sorted by # of jumps:
combined_df_jump_sorted=combined_df.sort_values('Total Stock Market', ascending=False)
combined_df_jump_sorted.index.name='Year'
figi_jump=px.parallel_categories(combined_df_jump_sorted.reset_index(), height=800, dimensions=combined_df_jump_sorted[['Total Stock Market', 'Number of Disasters', 'Total Deaths', 'No Affected', 'Average Std Dev']], color='Year',
labels={'No Affected':'Total Number of People Affected', 'average2': 'Average Rolling Standard Deviation of Returns', 'Total Stock Market': 'Stock Market Jumps'})
figi_jump.show()
df_sector_pct=pd.read_csv("./data/df_sector_pct.csv", infer_datetime_format=True)
df_gdp_sectors=pd.read_csv("./data/df_gdp_sectors.csv", infer_datetime_format=True)
df_sector_pct.rename(columns={"Unnamed: 0":"year"},inplace=True)
df_sector_pct.set_index("year",inplace=True)
df_gdp_sectors.set_index("Industry Title",inplace=True)
df_sector_pct.head(2)
df_gdp_sectors.head(2)
def show_gdp(year):
my_fig=px.pie(df_gdp_sectors[year], values=year,names=df_gdp_sectors.index)
return my_fig
interact(show_gdp, year=df_gdp_sectors.columns)
# def show_gdp(year):
# my_fig=px.pie(df_sector4[year], values=year,names=df_sector4.index)
# return my_fig
# interact(show_gdp, year=df_sector4.columns)
#px.plot(df_gdp_sectors.index, values=df_gdp_sectors.index, names=df_gdp_sectors.index)
show_gdp('2000')
df_gdp_sectors.head(3)
df_gdp_sectors['average'] = df_gdp_sectors.mean(numeric_only=True, axis=1)
df_gdp_sectors
def show_gdp_avg_20yrs():
my_fig=px.pie(df_gdp_sectors, values=df_gdp_sectors.average,names=df_gdp_sectors.index)
return my_fig
show_gdp_avg_20yrs()
df_gdp_sectors.describe()
df_gdp_sectors['average'].sum()